clear
clear mata
clear matrix

cd ..                                                                            /* Goes back to parent folder */

capture log close
log using "log/13_Dust_Bowl.log", replace

mkdir "tmp"                                                                      /* creates a temporary folder to store temp files */

*       Patriotism!

*       FIRST VERSION  June       7, 2022
*       THIS VERSION   June       7, 2022
*       LAST RUN       June       7, 2022

*       LAST REVISOR		BC

*       Log of revisions:

*       This processes an indicator for being hit by the Dust Bowl
*       The code consists of two main sections:
*       A. takes the erosions variables to the New Deal county map
*       B. recreates an indicator for whether the county is in the Great Plains (Hornbeck main sample)
*       The final Dust Bowl indicator is =1 for counties with erosion > 0.5 AND in Hornbeck's main sample

***************************************************************************************************
****                              PLAN OF THE PROCEDURE                                        ****
****___________________________________________________________________________________________****         
****                                                                                           ****
****                  A. Levels of erosion (entire US)                                         ****
****      1. Assume new data on erosion levels						                           ****
****      2. Merge with bridge between fips and icpsr codes			                           ****
****      3. Merge with the territorial division of 1930		 	                          ****
****      4. Merge with New Deal codes                       	 	                           ****
****      5. Collapse vars at nd level								                           ****
****                  B. Main sample of Hornbeck                                               ****
****      1. Replicate everything Richard does to recreate the main sample                     ****
****      3. Merge with ICPSR codes			     	 	                                       ****
****      4. Merge with New Deal codes                       	 	                           ****
****                  C. Merge the two database and create Dust Bowl indicator                 ****
****                  D. Erase junk 		                                                   ****
****                                                										   ****
***************************************************************************************************
set mem 500m
set maxvar 32000
set scheme s1color
set more off, perm
clear all

***************************************************************************************************
****                  A. Levels of erosion (entire US)                                         ****
****      1. Assume new data on erosion levels						                           ****
***************************************************************************************************
do "code-build/01_Shopping_List.do"

use "rawdata/Hornbeck/DustBowl_All_base1910", replace

keep if year == 1930
keep fips m1_0 m1_1 m1_2 

***************************************************************************************************
****      2. Merge with bridge between fips and icpsr codes			                           ****
***************************************************************************************************
preserve
	use "rawdata/Hornbeck/icpsr_fips", replace
	drop if fips == .															/* 2 missing */
	save "tmp/icpsr_fips", replace
restore

merge 1:1 fips using "tmp/icpsr_fips", nogen keep(2 3)							/* 10 not matching from master have missing state and county id */

***************************************************************************************************
****      3. Merge with the territorial division of 1930		 	                          ****
***************************************************************************************************
ren state  ICPSRST
ren county ICPSRCTY

tostring  ICPSRST ICPSRCTY, replace
merge 1:1 ICPSRST ICPSRCTY using "rawdata/NHGIS/US_county_1930", nogen keepus(SHAPE_AREA)

***************************************************************************************************
****      4. Merge with New Deal codes                       	 	                           ****
***************************************************************************************************
destring ICPSRST ICPSRCTY, replace
ren ICPSRST  stateicpsr
ren ICPSRCTY countyicpsr

merge 1:1 stateicpsr countyicpsr using "rawdata/maps/Bridge"

***************************************************************************************************
****      5. Collapse vars at nd level								                           ****
***************************************************************************************************
for VAR in var m1_1 m1_0 m1_2 : replace VAR = VAR * SHAPE_AREA
collapse (sum) m1_1 m1_0 m1_2 SHAPE_AREA, by(state stateicpsr countynd)
for VAR in var m1_1 m1_0 m1_2 : replace VAR = VAR / SHAPE_AREA

save "tmp/dustbowl", replace

***************************************************************************************************
****                  B. Main sample of Hornbeck                                               ****
****      1. Replicate everything Richard does to recreate the main sample                     ****
***************************************************************************************************
use "rawdata/Hornbeck/DustBowl_All_base1910", replace

*** create sample ***
	/*keep only the 12 Dust Bowl states*/
	keep if state==8|state==19|state==20|state==27|state==30|state==31|state==35|state==38|state==40|state==46|state==48|state==56
	drop if year==1935

*** IDs ***
sort fips year
	/*for any county-year pairs missing a county acres value, assign the nearest value for the county*/
	by fips: replace county_acres = county_acres[_n+1] if county_acres==.
	by fips: replace county_acres = county_acres[_n+1] if county_acres==.
	by fips: replace county_acres = county_acres[_n-1] if county_acres==.

/*create state names*/
gen     sname = "Colorado" if state==8
replace sname = "Iowa" if state==19
replace sname = "Kansas" if state==20
replace sname = "Minnesota" if state==27
replace sname = "Montana" if state==30
replace sname = "Nebraska" if state==31
replace sname = "New Mexico" if state==35
replace sname = "North Dakota" if state==38
replace sname = "Oklahoma" if state==40
replace sname = "South Dakota" if state==46
replace sname = "Texas" if state==48
replace sname = "Wyoming" if state==56

*** Generate outcome and control variables ***
	/*farmland*/
		/*restrict sample to only county-year pairs with data on acres of farmland*/
		drop if farmland==.
		/*create fraction of county-year pair acres that are farmland*/
		gen farmland_a = farmland/county_acres
	/*cropland*/
		/*create fraction of farmland that is cropland*/
		gen cropland_f = cropland/farmland
		/*create fraction of cropland that is fallow-- for only counties with more than 1000 acres of cropland*/
		gen fallow_c = cropland_fallow/cropland if cropland>1000
	/*farm size*/
		/*create average farm size variable*/
		gen avsize = farmland/farms
		/*create farms per acre */
		gen farms_a = farms/county_acres
	/*population*/	
		/*create population per acre*/
		gen population_a = population/county_acres
		/*create fraction of population that lives in rural areas*/
		gen fraction_rural = population_rural/population
		/*create fraction of population that lives on a farm*/
		gen fraction_farm = population_farm/population
	/*value of farms*/
		/*create log of value of farm land and buildings per acre*/
		gen value_landbuildings_f = ln(value_landbuildings/farmland)
		/*Drop two outlier errors*/
		replace value_landbuildings_f = . if value_landbuildings_f<-1
		/*create log of value of farmland per acre*/
		gen value_land_f = ln(value_land/farmland)
		/*revenue*/
		/*fill in missing value of animal products using 1940 base*/
		replace value_animalproducts = value_animalproducts_base1940 if (year==1920|year==1925|year==1930)&value_animalproducts==.
		replace value_all = value_crops+value_animalproducts if (year==1920|year==1925|year==1930)&value_all==.
		/*create log of revenue of farmland per acre*/
		gen value_revenue_f = ln(value_all/farmland)

		/*acres of crops*/
		/*all corn*/
		egen corn_a = rsum(corn_grain_a corn_silage_a)
		/*all grains*/
		egen obr_a = rsum(oats_a barley_a rye_a)
		/*create fraction of cropland in each particular crop*/
		foreach i of varlist hay_a wheat_a corn_a cotton_a obr_a {
			gen `i'_c = `i'/cropland if cropland>1000
			replace `i'_c = 0 if `i'_c==.
		}
	/*livestock per acre*/
	foreach i of varlist cows pigs chickens {
		gen `i'_a = `i'/county_acres
	replace `i'_a = 0 if `i'_a==.
	}
	/*make tenant farmland all the farmland that's not owned or managed*/
	replace farmland_tenant = farmland-farmland_own-farmland_manager if farmland_tenant==.
		
*** Balance sample and key variables ***	
		replace cropland = . if year > 1974
		replace value_crops=. if year>1964
		replace pasture = . if year>1964
		replace population = . if year==1997
		replace value_revenue_f = . if year==1997
		replace value_landbuildings_f = . if year==1974
		replace value_all = . if year==1997
		replace value_landbuildings = . if year==1974
		
		/*count the number of years for which we have data for each county on the following variables:*/
		sort fips
			/*value of all land and buildings*/
			by fips: egen balance_value_landbuildings_f= count(value_landbuildings_f)
			drop if balance_value_landbuildings_f!=15
			/*revenue*/
			by fips: egen balance_value_revenue_f = count(value_revenue_f)
			drop if balance_value_revenue_f!=15
			/*cropland*/
			by fips: egen balance_cropland = count(cropland)
			drop if balance_cropland!=10
			/*population*/
			by fips: egen balance_population = count(population)
			drop if balance_population!=9
			/*rural population*/
			by fips: egen balance_population_rural = count(population_rural)
			drop if balance_population_rural!=8
			drop balance_value_landbuildings_f balance_value_revenue_f balance_cropland balance_population balance_population_rural
			
*** Drop "Non-Plains" counties in these states
		drop if frac_grassland_tot <.5
*** Drop non-contiguous counties failing the grassland restriction
		drop if fips == 48043 | fips == 48243 | fips == 35013 | fips == 35017 | fips == 30007
        
		keep if year == 1930
		
***************************************************************************************************
****      3. Merge with ICPSR codes			     	 	                                       ****
***************************************************************************************************
drop state
merge m:1 fips using "tmp/icpsr_fips", nogen keep(3)							/* 10 not matching from master have missing state and county id */


***************************************************************************************************
****      4. Merge with New Deal codes                       	 	                           ****
***************************************************************************************************
ren state  stateicpsr
ren county countyicpsr

merge 1:1 stateicpsr countyicpsr using "rawdata/maps/Bridge", nogen keep(3)

gen sample_db = 1

collapse (max) sample_db , by(state stateicpsr countynd)
		
save "tmp/sample_db",  replace		

***************************************************************************************************
****                  C. Merge the two database and create Dust Bowl indicator                 ****
***************************************************************************************************
use "tmp/dustbowl", replace

merge 1:1 state stateicpsr countynd using "tmp/sample_db", nogen keep(1 3)

recode sample_db (. = 0)

gen erosion = m1_1 + m1_2                                                       /* Share of land with top-soil erosion */
gen dust_bowl = erosion > 0.5 & erosion != . if sample_db == 1                  /* Dust Bowl county has more than 50% of land with top-soil erosion and it's in the Great Plains */

keeporder stateicpsr countynd dust_bowl
drop if countynd == .                                                           /* These 51 counties could not be matched */

lab var dust_bowl "=1 if Dust Bowl county (Hornbeck)"

save "data/Dust_Bowl", replace

***************************************************************************************************
****                  D. Erase junk 		                                                   ****
***************************************************************************************************
erase "tmp/icpsr_fips.dta"
erase "tmp/dustbowl.dta"
erase "tmp/sample_db.dta"

rmdir "tmp"

log close
exit